const req = require('request');
const cheerio = require('cheerio');
var fs = require("fs");
var readline = require('readline');

// main('19f9f352c186');
getfolder();
// getfolderone('https://so.gushiwen.org/shiwenv_33cbdb2cf9b3.aspx');

var readlist = [];
setTimeout(() => {
  console.log('开始读取');
  var intt = setInterval(function(){
    const str = readlist.shift();
    console.log('数组长度：'+readlist.length+'  '+str);
    if(str==null) clearInterval(intt);
    try {
      main(str);
    } catch (error) {
      console.log(error);
    }
  },1);
}, 5000);

function getfolder(){
  const rootpath= 'E:\\爬虫目录\\temp\\古诗';
  fs.readdir(rootpath,function(err,menu){
    if(menu){
      menu.forEach(function(ele){
        if(ele=='目录.txt')return true;
        var readfile = rootpath+'\\'+ele;
        var hastempfile = fs.existsSync(readfile);
        if(hastempfile){
          var stat = fs.lstatSync(readfile);
          if(!stat.isDirectory()){
            var fRead = fs.createReadStream(readfile);
            var objReadline = readline.createInterface({
                input:fRead
            });
            objReadline.on('line',function (line) {
                var result = line.match(new RegExp("shiwenv_([^\&]+).aspx", "i"));
                readlist.push(result[1]);
                // main(result[1]);
            });
          }
        }
      })
    }
  });
}


function getfolderone(line){ 
  var result = line.match(new RegExp("shiwenv_([^\&]+).aspx", "i"));
  main(result[1]);
}

function main(key){
  const tempfile = 'E:\\爬虫目录\\temp\\古诗内容\\shiwenv_'+key+'.txt'
  var hastempfile = fs.existsSync(tempfile);
  if(hastempfile){
    var data = fs.readFileSync(tempfile);
    if(data.length==0){
      get();
    }else{
      spliter(data);
    }
  }else{
    get(key);
  }
}

function get(key){
  const tempfile = 'E:\\爬虫目录\\temp\\古诗内容\\shiwenv_'+key+'.txt'
  const url = 'https://so.gushiwen.org/shiwenv_'+key+'.aspx';
  req.get({
    url : url,
    headers: {
      'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
      'referer': 'https://so.gushiwen.org/shiwen/'
    },
    encoding : 'utf-8'
  }, (err, res, body)=>{
    if(!err && res.statusCode == 200)
      fs.writeFileSync(tempfile, body);
      setTimeout(() => {
        spliter(body,key);
      }, 1);
  })
} 

function spliter(html,key){
  let $ = cheerio.load(html,{decodeEntities: false});
  let title = $('div.main3>div.left>div.sons>div.cont>h1').text();
  var node = $('div.main3>div.left>div.sons>div.cont');
  var dynasty_author = node.first().find('p.source>a');
  var dynasty = dynasty_author.first().text();
  var author = dynasty_author.last().text();
  var contentnode = node.find('div.contson');
  var content = contentnode.text();
  var contntid = key;
  var tags = [];
  var tagsnode = $('div.main3>div.left>div.sons>div.tag>a');
  for (let index = 0; index < tagsnode.length; index++) {
    const element = tagsnode[index];
    const tag = $(element).text();
    if(tag!='') tags.push(tag)
  }
  var obj={};
  obj["title"]=title;
  obj["dynasty"]=dynasty;
  obj["author"]=author;
  obj["content"]=content;
  obj["tags"]=tags;
  title = title.replace(' ','').replace(' ','').replace('/','或').replace(' ','').replace(' ','').replace('/','或');
  var savefile = 'E:\\爬虫目录\\temp\\古诗\\古诗内容\\'+dynasty+'\\'+author+'\\'+title+'.txt';
  if(fs.existsSync(savefile)){
    return false;
  }
  //译文 注释 赏析
  var ajaxurl = 'https://so.gushiwen.org/nocdn/ajaxshiwencont.aspx?id='+contntid+'&value=yi';
  req.get({
    url : ajaxurl,
    headers: {
      'cookie': 'sec_tc=AQAAAO/6TzawrwkA3ejdudibM68C+wn5; Hm_lvt_04660099568f561a75456483228a9516=1584499418; login=flase; ASP.NET_SessionId=0zx0styuykzqqsdwuxlkfkxk; login=flase; gswZhanghao=zc8023zs%40163.com; codeyzgswso=b3bf2c3ec0319735; Hm_lpvt_04660099568f561a75456483228a9516=1584511947',
      'referer': 'https://so.gushiwen.org/shiwenv_'+contntid+'.aspx',
      'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    },
    encoding : 'utf-8'
  }, (err, res, body)=>{
    if(!err && res.statusCode == 200){
      // console.log(body);
      let $ = cheerio.load(body,{decodeEntities: false}); 
      obj["yi"]=$.html('p');
      var foldername = 'E:\\爬虫目录\\temp\\古诗\\古诗内容\\'+dynasty;
      if(!fs.existsSync(foldername)){
        fs.mkdirSync(foldername);
      }
      foldername = 'E:\\爬虫目录\\temp\\古诗\\古诗内容\\'+dynasty+'\\'+author;
      if(!fs.existsSync(foldername)){
        fs.mkdirSync(foldername);
      }
      fs.writeFileSync(savefile, JSON.stringify(obj));
    }
  })


}